# Module 1 Demo — Introduction to R and Data Science
# Based on R4DS Chapters 1, 4, 6, and 8
# Hands-on practice with R fundamentals, vectors, RStudio workflow, and project organization

# ===============================
# Part 1: Data Science Process Overview (R4DS Ch. 1)
# ===============================

cat("=== Welcome to Data Science with R ===\n")

# The data science process: Import → Tidy → Transform → Visualize → Model → Communicate
cat("Clinical Data Science Workflow:\n")
cat("1. IMPORT: Read clinical data (XPT, CSV, Excel)\n")
cat("2. TIDY: Structure according to CDISC standards\n")
cat("3. TRANSFORM: Create derived variables, apply derivations\n")
cat("4. VISUALIZE: Create plots for data review and analysis\n")
cat("5. MODEL: Run statistical analyses\n")
cat("6. COMMUNICATE: Generate tables, listings, figures\n\n")

# Check R version and session info
cat("R Version:", R.version.string, "\n")
cat("Working Directory:", getwd(), "\n\n")

# ===============================
# Part 2: Working with Vectors (R4DS Ch. 4)
# ===============================

cat("=== Understanding R Vectors ===\n")

# Atomic vectors - the foundation of R
# Logical vectors (for flags and conditions)
safety_population <- c(TRUE, TRUE, FALSE, TRUE, TRUE)
elderly_flag <- c(FALSE, TRUE, FALSE, TRUE, FALSE)

cat("Safety Population:", safety_population, "\n")
cat("Elderly Flag:", elderly_flag, "\n")

# Numeric vectors (for measurements)
age <- c(45, 67, 34, 71, 52)
weight <- c(70.5, 85.2, 55.8, 92.1, 78.3)
height <- c(170, 165, 160, 175, 168)

cat("Ages:", age, "\n")
cat("Weights:", weight, "\n")

# Character vectors (for IDs and categories)
usubjid <- c("001-001", "001-002", "001-003", "001-004", "001-005")
treatment <- c("Placebo", "Drug A", "Drug A", "Placebo", "Drug A")

cat("Subject IDs:", usubjid, "\n")
cat("Treatments:", treatment, "\n")

# Vector operations (clinical programming applications)
bmi <- weight / ((height/100)^2)  # Calculate BMI
age_group <- ifelse(age >= 65, "Elderly", "Adult")  # Create age groups

cat("Calculated BMI:", round(bmi, 1), "\n")
cat("Age Groups:", age_group, "\n\n")

# ===============================
# Part 3: Tidyverse Packages for Clinical Programming
# ===============================

cat("=== Loading Tidyverse for Clinical Programming ===\n")

# Install packages (run once - commented out after first use)
# install.packages(c("tidyverse", "haven", "lubridate", "here"))

# Load tidyverse and clinical packages
library(tidyverse)  # Loads dplyr, tibble, readr, ggplot2, and more
library(haven)      # Read/write SAS XPT files
library(lubridate) # Date/time handling
library(here)       # Robust file paths

# Check loaded packages
cat("Core tidyverse packages loaded:\n")
tidyverse_packages()

# Create a clinical dataset using our vectors from Part 2
dm <- tibble(
  USUBJID = usubjid,
  AGE = age,
  WEIGHT = weight,
  HEIGHT = height,
  SEX = c("M", "F", "F", "M", "F"),
  TRT01A = treatment,
  RFSTDTC = c("2024-01-15", "2024-01-16", "2024-01-17", "2024-01-18", "2024-01-19"),
  SAFFL = safety_population,
  BMI = round(bmi, 1),
  AGEGR1 = age_group
)

cat("\nClinical Demographics Dataset:\n")
print(dm)

cat("\nDataset Structure:\n")
glimpse(dm)

# ===============================
# Part 4: RStudio Workflow (R4DS Ch. 6)
# ===============================

cat("\n=== RStudio Workflow Best Practices ===\n")

# RStudio interface overview
cat("RStudio has 4 main panes:\n")
cat("1. Script Editor (Top-Left): Write and edit code\n")
cat("2. Console (Bottom-Left): Execute code interactively\n")
cat("3. Environment/History (Top-Right): View objects and command history\n")
cat("4. Files/Plots/Packages/Help (Bottom-Right): Navigate and view outputs\n\n")

# Code organization best practices
cat("Code Organization Tips:\n")
cat("- Use descriptive variable names\n")
cat("- Add comments explaining clinical context\n")
cat("- Use consistent spacing and indentation\n")
cat("- Group related operations together\n\n")

# Example of well-organized clinical code
# Create treatment-emergent adverse event flag
ae_teae_flag <- ifelse(ae_start_date >= first_dose_date, "Y", "N")

# Calculate study day from reference start date
study_day <- as.numeric(visit_date - rfstdtc) + 1

cat("Try these RStudio shortcuts:\n")
cat("- Ctrl+Enter (Cmd+Enter): Run current line\n")
cat("- Ctrl+Shift+Enter (Cmd+Shift+Enter): Run entire script\n")
cat("- Tab: Auto-completion\n")
cat("- Ctrl+1/2/3/4: Focus different panes\n\n")

# ===============================
# Part 5: Scripts and Projects (R4DS Ch. 8)
# ===============================

cat("=== Reproducible Scripts and Project Organization ===\n")

# Script header best practices (like this file!)
cat("Good R scripts should include:\n")
cat("- Title and purpose\n")
cat("- Author and date\n")
cat("- Input and output file descriptions\n")
cat("- Package loading at the top\n")
cat("- Clear section organization\n\n")

# Demonstrate project organization
cat("Recommended clinical programming project structure:\n")
cat("my_clinical_study/\n")
cat("├── my_clinical_study.Rproj\n")
cat("├── data/\n")
cat("│   ├── raw/           # Original data files\n")
cat("│   ├── sdtm/          # SDTM datasets\n")
cat("│   └── adam/          # ADAM datasets\n")
cat("├── programs/\n")
cat("│   ├── sdtm/          # SDTM creation programs\n")
cat("│   ├── adam/          # ADAM creation programs\n")
cat("│   └── tlf/           # Tables, listings, figures\n")
cat("├── outputs/\n")
cat("│   ├── datasets/      # Final datasets\n")
cat("│   ├── tables/        # Analysis tables\n")
cat("│   └── figures/       # Analysis figures\n")
cat("└── docs/              # Documentation, protocols\n\n")

# Working directory best practices
cat("Current working directory:", getwd(), "\n")
cat("Tip: Use RStudio Projects to avoid setwd() calls\n")
cat("Tip: Use here() package for robust file paths\n\n")

# ===============================
# Part 6: Data Manipulation Preview (Using Vectors & Tibbles)
# ===============================

cat("=== Data Manipulation with Tidyverse ===\n")

# Add derived variables using mutate()
dm <- dm %>%
  mutate(
    # Create elderly flag
    ELDERLY = ifelse(AGE >= 65, "Y", "N"),

    # Convert character date to Date format
    RFSTDT = ymd(RFSTDTC),

    # Create BMI category
    BMICAT = case_when(
      BMI < 18.5 ~ "Underweight",
      BMI < 25 ~ "Normal",
      BMI < 30 ~ "Overweight",
      BMI >= 30 ~ "Obese"
    )
  )

cat("Updated dataset with derived variables:\n")
print(dm)

# Practice data summarization
dm_summary <- dm %>%
  filter(SAFFL == TRUE) %>%        # Safety population only
  select(USUBJID, AGE, SEX, TRT01A, ELDERLY, BMICAT) %>%  # Select key variables
  arrange(AGE)                     # Sort by age

cat("\nSafety population summary:\n")
print(dm_summary)

# ===============================
# Part 7: Getting Help and Documentation
# ===============================

cat("\n=== R Help System ===\n")

# Built-in help functions
cat("Try these help commands in the console:\n")
cat("?mean        # Help for specific function\n")
cat("??regression # Search for functions\n")
cat("example(mean) # See function examples\n")
cat("args(lm)     # View function arguments\n\n")

# Summary statistics using base R
cat("Summary statistics for age:\n")
print(summary(dm$AGE))

# Count frequencies using tidyverse
cat("\nFrequency counts:\n")
cat("By sex:\n")
print(dm %>% count(SEX))

cat("By treatment:\n")
print(dm %>% count(TRT01A))

cat("By BMI category:\n")
print(dm %>% count(BMICAT))

# ===============================
# Part 8: AI-Powered Assistance
# ===============================

cat("\n=== GitHub Copilot for Clinical Programming ===\n")

# Examples of comment-driven development with Copilot
cat("Try typing these comments in RStudio and see Copilot suggestions:\n\n")

# Calculate the mean age of subjects in safety population
safety_subjects <- dm %>% filter(SAFFL == TRUE)
mean_age <- mean(safety_subjects$AGE)
cat("Mean age in safety population:", round(mean_age, 1), "years\n")

# Create study day calculation example
# Note: In real analysis, you'd have actual visit dates
example_visit_date <- as.Date("2024-02-15")
example_rfstdt <- as.Date("2024-01-15")
study_day_example <- as.numeric(example_visit_date - example_rfstdt) + 1
cat("Example study day calculation:", study_day_example, "\n")

cat("\nCommon Copilot prompts for clinical programming:\n")
cat("- 'Create elderly flag for age >= 65'\n")
cat("- 'Read SAS transport file'\n")
cat("- 'Calculate study day from reference date'\n")
cat("- 'Convert character date to Date format'\n")

cat("\nRemember: Always validate Copilot suggestions against clinical requirements!\n")

# ===============================
# Part 9: Module 1 Summary and Next Steps
# ===============================

cat("\n=== Module 1 Complete! 🎉 ===\n")

cat("You've successfully learned:\n")
cat("✅ Data science process and its application to clinical programming (R4DS Ch. 1)\n")
cat("✅ Vector fundamentals - R's building blocks (R4DS Ch. 4)\n")
cat("✅ RStudio workflow and interface navigation (R4DS Ch. 6)\n")
cat("✅ Script organization and project management (R4DS Ch. 8)\n")
cat("✅ Tidyverse packages for clinical data analysis\n")
cat("✅ Best practices for reproducible clinical programming\n\n")

cat("Key objects created in this demo:\n")
cat("- Vectors: age, weight, usubjid, treatment\n")
cat("- Tibble: dm (demographics dataset)\n")
cat("- Derived variables: BMI, age groups, elderly flags\n\n")

# Display final dataset
cat("Final demographics dataset:\n")
print(dm)

cat("\nRStudio Productivity Tips:\n")
cat("- Use Ctrl+Enter (Cmd+Enter) to run lines\n")
cat("- Use Tab for auto-completion\n")
cat("- Use View(dm) to open data viewer\n")
cat("- Check Environment pane for all objects\n")
cat("- Use Projects for organized workflows\n\n")

cat("🚀 Ready for Module 2: Data Manipulation!\n")
cat("In Module 2, you'll learn the five key verbs of data transformation:\n")
cat("filter(), select(), mutate(), arrange(), and summarise()\n")
